# script to pull all observed MMSI-IMO Pairs from raw AIS data

import arcpy
#import numpy
#import datetime
#import time
import calendar
#import os
import pandas

#from numpy.lib.recfunctions import append_fields

arcpy.env.overwriteOutput=True

#numpy.set_printoptions(threshold=numpy.nan)

#years = [2009,2010,2011,2012,2013,2014,2015,2016]
#years = 2009
months = range(1,13)
zones = [3,4,5,6,7,8,9,10,11]
#months = [1]
#zones = [3]

save_file = r"H:\My Drive\Boats\ReplicationCode\data\AIS\vessel_list.csv" 
var_list = ['MMSI','IMO','VesselType','Length', 'Width'] 
where_clause = '' 

# reads in all 2015-2016 points and collects unique MMSI/IMO/length/width
years = [2015,2016]
frames1 = []
for year in years :
    for month in months :
        print ( month )
        for zone in zones :
            pts_file = r"H:/My Drive/Boats/ReplicationCode/data/AIS/%s/%02d_%s_%s/AIS_%s_%02d_Zone%02d.csv" %(year,month,calendar.month_name[month],year,year,month,zone)
            df_pts = pandas.read_csv(pts_file,usecols=['MMSI','IMO','VesselType','Length','Width'])
            df_i = df_pts.drop_duplicates() # doint this to make df smaller
            frames1.append(df_i)

df1 = pandas.concat(frames1)
df1.IMO = df1.IMO.str.strip('IMO')

# reads in all 2009 vessel tables
frames = []
for month in months :
    print ( month )
    for zone in zones :
        vessel_table = r"H:/My Drive/Boats/ReplicationCode/data/AIS/2009/%02d_%s_2009/Zone%s_2009_%02d.gdb/Vessel" %(month,calendar.month_name[month],zone,month)
        df_i = pandas.DataFrame( arcpy.da.TableToNumPyArray(vessel_table,var_list,where_clause,null_value = -9999) )
        frames.append(df_i)

# combining 2009 tables and then 2009 with 2015-2016
df = pandas.concat(frames)
df = pandas.concat([df,df1])

# dropping duplicates and saving
df = df.drop_duplicates()

# drop those with bad mmsi 
df = df[df['MMSI']>201000000]

df.to_csv(save_file,sep=',',index=False,header=True)
print ("Saved File:")
print (save_file)
